Load the dataset we want.

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.4.2
## -- Attaching packages ---------------------------------- tidyverse 1.2.1 --
## <U+221A> ggplot2 2.2.1     <U+221A> purrr   0.2.4
## <U+221A> tibble  1.3.4     <U+221A> dplyr   0.7.4
## <U+221A> tidyr   0.7.2     <U+221A> stringr 1.2.0
## <U+221A> readr   1.1.1     <U+221A> forcats 0.2.0
## Warning: package 'tidyr' was built under R version 3.4.2
## Warning: package 'purrr' was built under R version 3.4.2
## Warning: package 'dplyr' was built under R version 3.4.2
## -- Conflicts ------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(janitor)
library(ggridges)
library(ggthemes)

#embedding plots in rmarkdown
knitr::opts_chunk$set(fig.width=12, fig.height=8, out.width = "80%")
theme_set(theme_bw())

First we import the data and clean it.

health = 
  readxl::read_xls('../food_enviroment_atlas.xls', sheet = 'HEALTH') %>%
  clean_names() %>%
  select(1:7)

socioeconomic = 
  readxl::read_xls('../food_enviroment_atlas.xls', sheet = 'SOCIOECONOMIC') %>%
  clean_names() %>%
  select(1:3, 10:18)

assistance = 
  readxl::read_xls('../food_enviroment_atlas.xls', sheet = 'ASSISTANCE') %>%
  clean_names() %>%
  select(1:3, 23:29)

restaurant = 
  readxl::read_xls('../food_enviroment_atlas.xls', sheet = 'RESTAURANTS') %>%
  clean_names() %>%
  select(1:9, 16:17)

county =
  readxl::read_xls('../food_enviroment_atlas.xls', sheet = 'Supplemental Data - County') %>%
  clean_names()

state =
  readxl::read_xls('../food_enviroment_atlas.xls', sheet = 'Supplemental Data - State') %>%
  clean_names() %>%
  select(1:2, 9:14, 33:40)

store = 
  readxl::read_xls('../food_enviroment_atlas.xls', sheet = 'STORES') %>%
  clean_names() %>%
  select(1:27)

Socioeconomic VS diabetes and obesity

health_state = health %>%
  group_by(state) %>%
  summarise(pct_diabetes_adults08 = mean(pct_diabetes_adults08),
            pct_diabetes_adults13 = mean(pct_diabetes_adults13),
            pct_obese_adults08 = mean(pct_obese_adults08),
            pct_obese_adults13 = mean(pct_obese_adults13))
  
socioeconomic_state = socioeconomic %>% 
  group_by(state) %>%
  summarise(pct_65older10 = mean(pct_65older10),
            pct_18younger10 = mean(pct_18younger10),
            medhhinc15 = mean(medhhinc15),
            povrate15 = mean(povrate15),
            childpovrate15 = mean(childpovrate15),
            perpov10 = mean(perpov10)/n(),
            perchldpov10 = mean(perchldpov10)/n())

social_health_whole = merge(socioeconomic, health,by=c("fips", "state", "county"))
social_health = merge(socioeconomic_state, health_state,by=c("state"))

#normally distributed 
hist(health$pct_obese_adults13)

hist(health$pct_diabetes_adults13)

# median income VS obesity
social_health_whole %>%
  group_by(state) %>%
  ggplot(aes(x = medhhinc15, y = pct_obese_adults13)) +
  geom_point(aes(color = state, size = 1), alpha = .6) +
  geom_smooth() +
  labs(
    x = "Median household income, 2015",
    y = "Percentage of adult obesity, 2013 ")  +  
  theme(text = element_text(size = 14), 
        axis.text.x = element_text(size = 10), 
        axis.text.y = element_text(size = 10))
## `geom_smooth()` using method = 'gam'
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing missing values (geom_point).

# median income VS diabetes
social_health_whole %>%
  group_by(state) %>%
  ggplot(aes(x = medhhinc15, y = pct_diabetes_adults13)) +
  geom_point(aes(color = state, size = 1), alpha = .6) +
  geom_smooth() +
  labs(
    x = "Median household income, 2015",
    y = "Percentage of adult diabetes, 2013 ")  +  
  theme(text = element_text(size = 14), 
        axis.text.x = element_text(size = 10), 
        axis.text.y = element_text(size = 10))
## `geom_smooth()` using method = 'gam'
## Warning: Removed 4 rows containing non-finite values (stat_smooth).

## Warning: Removed 4 rows containing missing values (geom_point).

# Diabetes VS obesity
social_health_whole %>%
  group_by(state) %>%
  ggplot(aes(x = pct_obese_adults13, y = pct_diabetes_adults13)) +
  geom_point(aes(color = state, size = 1), alpha = .6) +
  geom_smooth() +
  labs(
    x = "Percentage of adult obesity, 2013",
    y = "Percentage of adult diabetes, 2013 ")  +  
  theme(text = element_text(size = 14), 
        axis.text.x = element_text(size = 10), 
        axis.text.y = element_text(size = 10))
## `geom_smooth()` using method = 'gam'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

A brief description